cd C:\Users\alpha\Documents\work\burned pixels\ecoregions_countries_climates_continents
import geopandas as gpd
import matplotlib.pyplot as plt
import pickle
import pandas as pd
import numpy as np
import calendar
import matplotlib.cm as cm
import matplotlib.colors as cls
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
#latex
import matplotlib
matplotlib.rcParams['mathtext.fontset'] = 'custom'
matplotlib.rcParams['mathtext.rm'] = 'Times New Roman'
matplotlib.rcParams['mathtext.it'] = 'Times New Roman:italic'
matplotlib.rcParams['mathtext.bf'] = 'Times New Roman:bold'
from scipy.stats import boxcox, probplot
bios = gpd.read_file('biomes_hemispheres')
bios.columns
area = bios['SUM_Area']*1e-6
data_columns = bios.columns[3:3+17*12]
inter_cols = {}
for yr in range(17):
inter_cols[yr] = data_columns[12*yr:12*yr +12]
intra_cols = {}
for mo in range(12):
intra_cols[mo] = [data_columns[mo + yr *12 ] for yr in range(17)]
for yr in range(17):
inter = bios[ inter_cols[yr]].sum(axis=1)/area
label = 'inter_' + str(yr +2001)
bios= bios.assign(label=inter)
bios = bios.rename(columns={'label': label})
inter_columns = bios.columns[-17:]
inter_max = bios[inter_columns].max().max()
for mo in range(12):
intra = bios[ intra_cols[mo]].sum(axis=1)/(17*area)
label = 'intra_{:02}'.format(mo)
bios = bios.assign(label=intra)
bios = bios.rename(columns={'label': label})
intra_columns = bios.columns[-12:]
intra_max = bios[intra_columns].max().max()
def draw_map(df,column,title=None,vmin=0,vmax=1,name='name',dpi=100,cmap='OrRd'):
fig, ax = plt.subplots(figsize=(12, 8))
ax.set_aspect('equal')
ax = df.plot(ax =ax, column=column, cmap=cmap,vmin=vmin, vmax=vmax)
ax.set_axis_off()
norm = cls.Normalize(vmin, vmax)
cmmapable = cm.ScalarMappable(norm, cmap)
cmmapable._A = []
cbaxes = inset_axes(ax, width="80%", height="1%", loc=8)
cb = plt.colorbar(cmmapable, cax=cbaxes,orientation="horizontal")
cb.set_label(title, fontsize=20, family='Times New Roman')
cb.ax.set_yticklabels(cb.ax.get_yticklabels(), fontsize=15, family='Times New Roman')
plt.show()
fig.savefig(name, dpi=dpi, bbox_inches='tight')
''' backup
def draw_map(df,column,title=None,vmin=0,vmax=1,name='name',dpi=100):
fig, ax = plt.subplots(figsize=(12, 8))
ax.set_aspect('equal')
ax = df.plot(ax =ax, column=column, cmap='OrRd',vmin=vmin, vmax=vmax)
ax.set_axis_off()
norm = cls.Normalize(vmin, vmax)
cmmapable = cm.ScalarMappable(norm, 'OrRd')
cmmapable._A = []
cbaxes = inset_axes(ax, width="80%", height="1%", loc=3)
cb = plt.colorbar(cmmapable, cax=cbaxes,orientation="horizontal")
cb.set_label(title, fontsize=20, family='Times New Roman')
cb.ax.set_yticklabels(cb.ax.get_yticklabels(), fontsize=15, family='Times New Roman')
plt.show()
fig.savefig(name, dpi=dpi, bbox_inches='tight')
'''
lambdas = []
for yr in range(17):
column= 'inter_{}'.format(2001 + yr)
test_column, lbd = boxcox(bios[column] + 1e-25)
lambdas.append(lbd)
lmbda= sum(lambdas)/len(lambdas)
lambdas
probplot(bios.inter_2017, plot=plt.gca());
probplot(test_column, plot=plt.gca());
maxs = []
mins = []
for yr in range(17):
column= 'inter_{}'.format(2001 + yr)
test_column = boxcox(bios[column] + 1e-25,lmbda=lmbda)
maxs.append(test_column.max())
mins.append(test_column.min())
box_min = min(mins)
box_delta =max(maxs) - box_min
for yr in range(17):
column = 'inter_{}'.format(yr +2001)
test_column = (boxcox(bios[column] + 1e-25,lmbda=lmbda) - box_min)/box_delta
bios = bios.assign(test_column =test_column)
yr_name = str(yr +2001)
title = r'Burned area index (#/$\mathrm{km}^{2}$) by biome in each hemisphere ' +yr_name
name = 'inter_biomes_nsparts_index_{}'.format(yr +2001)
draw_map(bios,'test_column',title=title,name=name)
lambdas = []
for mo in range(12):
column= 'intra_{:02}'.format(mo)
test_column, lbd = boxcox(bios[column] + 1e-25)
lambdas.append(lbd)
lmbda= sum(lambdas)/len(lambdas)
lambdas
probplot(bios.intra_11, plot=plt.gca());
probplot(test_column, plot=plt.gca());
maxs = []
mins = []
for mo in range(12):
column= column= 'intra_{:02}'.format(mo)
test_column = boxcox(bios[column] + 1e-25,lmbda=lmbda)
maxs.append(test_column.max())
mins.append(test_column.min())
box_min = min(mins)
box_delta =max(maxs) - box_min
for mo in range(12):
column = 'intra_{:02}'.format(mo)
test_column = (boxcox(bios[column] + 1e-25,lmbda=lmbda) - box_min)/box_delta
bios= bios.assign(test_column =test_column)
mo_name = calendar.month_name[mo + 1]
title = r'Burned area index (#/$\mathrm{km}^{2}$) by biome in each hemisphere ' +mo_name + ' (2001-2017)'
name = 'intra_biomes_nsparts_index_{:02}'.format(mo)
draw_map(bios,'test_column',title=title,name=name)
total = bios[data_columns].sum(axis=1)/17
total = total/area
bios= bios.assign(total=total)
column = 'total'
test_column, lbd = boxcox(bios[column] + 1e-25)
test_column = (test_column - test_column.min())/(test_column.max() - test_column.min())
bios = bios.assign(test_column =test_column)
title = r'Burned area index (#/$\mathrm{km}^{2}$) by biome in each hemisphere (2001-2017)'
name = 'total_biomes_nsparts_index'
draw_map(bios,'test_column',title=title,name=name,dpi=600)
inter_medias = bios[inter_columns].mean(axis=1)
matriz = np.array([[4,6,8],[6,8,10]])
matriz - np.array([[0],[2]])
matriz.shape
np.array([[0],[2]]).shape
anomalias = np.array(bios[inter_columns]) - inter_medias.values.reshape(27,1)
bios[inter_columns]
anomalias[0,:]
inter_medias.values.reshape(27,1)
anomalias.max()
for yr in range(17):
test_column = anomalias[:,yr]
bios = bios.assign(test_column =test_column)
yr_name = str(yr +2001)
#"Burnt area pixels (#/km2) by biome in 2001
title = r'Anomalies burned pixels (#/$\mathrm{km}^{2}$) by biome N-S parts in ' +yr_name
#title = r'Burned Area Pixels/$\mathrm{Km}^{2}$ by Climate 2001-2017 {} '.format(calendar.month_name[mo + 1])
#title =(r'ABC123 vs $\mathrm{ABC123}^{123}$')
name = 'inter_anomalies_biomes_nsparts_index_{}'.format(yr +2001)
draw_map(bios,'test_column',title=title,name=name,cmap='seismic',vmin=anomalias.min(),vmax=anomalias.max())
intra_medias = bios[intra_columns].mean(axis=1)
anomalias = np.array(bios[intra_columns]) - intra_medias.values.reshape(27,1)
bigger = abs(anomalias.max()) if abs(anomalias.max()) >= abs(anomalias.min()) else abs(anomalias.min())
vmax = bigger; vmin = -1 * bigger
for mo in range(12):
test_column = anomalias[:,mo]
bios= bios.assign(test_column =test_column)
mo_name = calendar.month_name[mo + 1]
#title = r'Anomalies burned area pixels (#/$\mathrm{km}^{2}$) by biome in each hemisphere ' +mo_name + ' (2001-2017)'
title = r'Anomalies (#/$\mathrm{km}^{2}$) by biome ' +mo_name + ' (2001-2017)'
name = 'intra_anomalies_biomes_nsparts_index_{:02}'.format(mo)
draw_map(bios,'test_column',title=title,name=name,cmap='seismic',vmin=vmin,vmax=vmax,dpi=600)
anomalias.shape
anomalia = bios.total -bios.total.mean()
anomalia
test_column = anomalia
bios= bios.assign(test_column =test_column)
title = r'Anomalies burned area pixels (#/$\mathrm{km}^{2}$) by biome N-S parts in (2001-2017)'
name = 'total_anomalies_biomes_nsparts'
draw_map(bios,'test_column',title=title,name=name,cmap='seismic',vmin=anomalia.min(),vmax=anomalia.max())
def test_map(df,column,title=None,vmin=0,vmax=1,name='name',dpi=100,cmap='OrRd'):
fig, ax = plt.subplots(figsize=(12, 8))
ax.set_aspect('equal')
ax = df.plot(ax =ax, column=column, cmap=cmap,vmin=vmin, vmax=vmax)
ax.set_axis_off()
norm = cls.Normalize(vmin, vmax)
cmmapable = cm.ScalarMappable(norm, cmap)
cmmapable._A = []
cbaxes = inset_axes(ax, width="80%", height="1%", loc=8)
cb = plt.colorbar(cmmapable, cax=cbaxes,orientation="horizontal")
cb.set_label(title, fontsize=20, family='Times New Roman')
cb.ax.set_yticklabels(cb.ax.get_yticklabels(), fontsize=15, family='Times New Roman')
fig.tight_layout()
plt.show()
fig.savefig(name, dpi=dpi, bbox_inches='tight')
test_map(bios,'test_column',title='teste',name='teste_tight',cmap='seismic',vmin=vmin,vmax=vmax,dpi=600)
bios.columns
len(bios.columns)
relevant_columns = bios.columns[1:-3]
relevant_columns
bios = bios [relevant_columns]
type( bios)
bios
from pandas import ExcelWriter
import xlsxwriter
#writer = ExcelWriter('results_newstyle2.xlsx',engine='xlsxwriter')
#df_finall.to_excel(writer,sheet_name='Burned Pixels',engine='xlsxwriter')
#writer.save()
bios['SUM_Area'] = bios['SUM_Area'].apply (lambda x:x/1e6)
bio_names = pd.read_excel('biomas_nomes.xlsx')
bio_names.set_index('BIOME_NUM')
type(bio_names.loc[8].values[1])
def bioname(bionum):
return bio_names.loc[bionum].values[1]
bionames = bios['BIOME_NUM'].apply(lambda x:bioname(x))
bios = bios.assign(biome_names = bionames)
bios.columns
new_columns =[ bios.columns[0]] +[ bios.columns[-1]] + list(bios.columns[1:-1] )
new_columns
bios = bios[new_columns]
data_columns = bios.columns[3:-1]
data_columns
writer = ExcelWriter('biome_analysis.xlsx',engine='xlsxwriter')
bios.to_excel(writer,sheet_name='original',engine='xlsxwriter', index= False)
inter_cols = {}
for yr in range(17):
inter_cols[yr] = data_columns[12*yr:12*yr +12]
intra_cols = {}
for mo in range(12):
intra_cols[mo] = [data_columns[mo + yr *12 ] for yr in range(17)]
area = bios['SUM_Area']
for yr in range(17):
inter = bios[ inter_cols[yr]].sum(axis=1)/area
label = 'inter_' + str(yr +2001)
bios= bios.assign(label=inter)
bios = bios.rename(columns={'label': label})
inter_columns = bios.columns[-17:]
inter_columns
for mo in range(12):
intra = bios[ intra_cols[mo]].sum(axis=1)/area
label = 'intra_{:02}'.format(mo)
bios = bios.assign(label=intra)
bios = bios.rename(columns={'label': label})
intra_columns = bios.columns[-12:]
intra_columns
intra_cols
total = bios[data_columns].sum(axis=1)
total_area = bios[data_columns].sum(axis=1)/area
colunas_hemis = list(bios.columns[:3]) + ['SUM_Area']
colunas_hemis
table_hemis = bios[colunas_hemis]
table_hemis = table_hemis.assign(total = total)
table_hemis = table_hemis.assign(total_area = total_area)
table_hemis['biome_names'][11]
table_hemis.to_excel(writer,sheet_name='totais',engine='xlsxwriter', index= False)
writer.save()
table_hemis
table_hemis.columns
dict_agg = {'is_north': 'first', 'biome_names': 'first', 'SUM_Area': 'sum', 'total':'sum'}
table_hemis = table_hemis[['BIOME_NUM', 'biome_names', 'is_north', 'SUM_Area', 'total']].groupby('BIOME_NUM').agg(dict_agg).drop('is_north', axis=1)
total_area= table_hemis['total']/ table_hemis['SUM_Area']
table_hemis = table_hemis.assign(total_area = total_area)
table_hemis
writer = ExcelWriter('biome_analysis2.xlsx',engine='xlsxwriter')
table_hemis.to_excel(writer,sheet_name='totais2',engine='xlsxwriter')
writer.save()
from openpyxl import load_workbook
wb = load_workbook('biome_analysis.xlsx')
writer = pd.ExcelWriter('biome_analysis.xlsx',engine='openpyxl')
writer.book = wb
writer.sheets = dict((ws.title, ws) for ws in wb.worksheets)
table_hemis.to_excel(writer, sheet_name='total_2')
writer.save()
#código acima, supostamente abria ficheiro excel já existente, e acrescentava uma nova folha. na verdade, acrescenteu uma
#nova folha mas não ao ficheiro já existente
bios
bios[inter_columns]['inter_2001'].plot(kind='bar')
bios.columns[:3]
labels = ['{} {}'.format( int(bios['BIOME_NUM'].iloc[i]), bios['is_north'].iloc[i].upper()) for i in bios.index ]
labels
ax = bios[inter_columns].plot(kind='bar')
ax.set_xticklabels( labels);plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
#fig = plt.gcf()
plt.figure(figsize=(16,24))
plt.show()
ax
ax.set_xlabel =labels;plt.show()
fig, ax = plt.subplots(figsize=(24,16))
bios[inter_columns].plot(kind='bar',ax= ax)
ax.set_xticklabels( labels, rotation =None,fontsize=15)
plt.legend(fontsize=15)
plt.show()
fig, ax = plt.subplots(figsize=(24,16))
bios[inter_columns].plot(kind='bar',ax= ax)
ax.set_xticklabels( labels, rotation =None,fontsize=15)
plt.legend(fontsize=15)
plt.show()
#fig, axes =plt.subplots(3,6,figsize =(24,16))
bios[inter_columns].plot(kind='bar',subplots=True, grid=True,title="Variação interanual", layout=(6, 3), sharex=True, sharey=True, legend = False, )
[ax.set_xticklabels(labels) for ax in plt.gcf().axes]
plt.tight_layout()
plt.show()
'''
fig, axes =plt.subplots(6,3,figsize =(24,16))
bios[inter_columns].plot(axes =axes ,kind='bar',title="Variação interanual", legend = False, )
[ax.set_xticklabels(labels) for ax in plt.gcf().axes]
plt.tight_layout()
plt.show()
'''
'''
fig, axes = plt.subplots(nrows=6, ncols=3)
for i, c in enumerate(bios[inter_columns].columns):
bios[inter_columns][c].plot(kind='bar', ax=axes[i])
plt.tight_layout()
plt.show()
'''
for i, c in enumerate(bios[inter_columns].columns):
bios[inter_columns][c].plot(kind='bar', title = c[-4:])
plt.gca().set_xticklabels( labels)
plt.show()
# não fucinona por causa das areas
#bios[list(inter_columns) + ['BIOME_NUM']].groupby('BIOME_NUM').sum()
fig, ax = plt.subplots(figsize=(24,16))
bios[intra_columns].plot(kind='bar',ax= ax)
ax.set_xticklabels( labels, rotation =None,fontsize=15)
plt.legend(fontsize=15)
plt.show()
month_names
for i, c in enumerate(bios[intra_columns].columns):
bios[intra_columns][c].plot(kind='bar', title = calendar.month_name[int(c[-2:]) +1][:3])
plt.gca().set_xticklabels( labels)
plt.show()
calendar.month_name[int('intra_00'[-2:]) +1][:3]
bios.columns[:-17-12]
biomes = bios[bios.columns[:-17-12]]
biomes = biomes.groupby('BIOME_NUM').sum()
biomes.columns
inter_cols_t = {}
for yr in range(17):
inter_cols_t[yr] = biomes.columns[12*yr:12*yr +12]
area_t = biomes['SUM_Area']
for yr in range(17):
inter = biomes[ inter_cols[yr]].sum(axis=1)/area_t
label = 'inter_' + str(yr +2001)
biomes= biomes.assign(label=inter)
biomes = biomes.rename(columns={'label': label})
inter_columns_t = biomes.columns[-17:]
inter_columns_t
fig, ax = plt.subplots(figsize=(24,16))
biomes[inter_columns_t].plot(kind='bar',ax= ax)
ax.set_xticklabels( range(15), rotation =None,fontsize=15)
plt.legend(fontsize=15)
plt.show()
for i, c in enumerate(biomes[inter_columns_t].columns):
biomes[inter_columns_t][c].plot(kind='bar', title = c[-4:])
#plt.gca().set_xticklabels( labels)
plt.show()
biomes[inter_columns_t].iloc[0].plot(kind='bar', title =table_hemis.iloc[0]['biome_names']); plt.gca().set_xticklabels( range(2001,2018))
table_hemis.iloc[0]['biome_names']
for i in range(15):
biomes[inter_columns_t].iloc[i].plot(kind='bar', title =table_hemis.iloc[i]['biome_names'])
plt.gca().set_xticklabels( range(2001,2018))
plt.show()
intra_cols_t = {}
for mo in range(12):
intra_cols_t[mo] = [biomes.columns[mo + yr *12 ] for yr in range(17)]
intra_cols_t
for mo in range(12):
intra = bios[ intra_cols[mo]].sum(axis=1)/area
label = 'intra_{:02}'.format(mo)
bios = bios.assign(label=intra)
bios = bios.rename(columns={'label': label})
#calendar.month_name[int(c[-2:]) +1][:3]
months_names = [calendar.month_name[i +1][:3] for i in range(12)]
for i in range(bios.shape[0]):
title ='{} {}'.format(table_hemis.iloc[int(bios.iloc[i]['BIOME_NUM'])]['biome_names'],bios.iloc[i]['is_north'].upper())
bios[intra_columns].iloc[i].plot(kind='bar', title=title)
plt.gca().set_xticklabels( months_names)
plt.show()
bios.shape[0]
i= 3
'{} {}'.format(table_hemis.iloc[bios.iloc[i]['BIOME_NUM']]['biome_names'],bios.iloc[i]['is_north'].upper())
table_hemis.iloc[bios.iloc[i]['BIOME_NUM']]['biome_names']
table_hemis.iloc[bios.iloc[i]['BIOME_NUM']]